load("~/Documents/ISU/23Fall/STAT579/final project/data/border_clean.Rdata")
#load("C:/Users/rebek/github/packages/borderxing/data/border_clean.Rdata")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
#ggplotly()
#read data
Canada_border <- border %>%
  filter(Border == "US-Canada Border")%>%
  mutate(
    Year = factor(Year,levels = c(1996,1997,1998,1999,2000,2001,2002,
                                  2003,2004,2005,2006,2007,2008,2009,2010,2011,
                                  2012,2013,2014,2015,2016,2017,2018,2019,2020,
                                  2021,2022,2023)),
    date=lubridate::my(Date),
         month=month(date),
         year=year(date)
  )

#port code How many port have complete set How many port should we consider ? Does Port_name change ? There’s 118 levels but I only have 90 rows. Only 89 when I group_by Port_name

#Eastport

Canada_border %>%
  filter(Port_name == 'Eastport')%>%
  group_by(Port_code)%>%
  summarise(
    n = n()
  )
## # A tibble: 0 × 2
## # ℹ 2 variables: Port_code <fct>, n <int>

Eastport has two Port_code

#Canada_border %>%
  #filter(Port_name == 'Eastport')%>%
  #group_by(Port_code)%>%
  #ggplot(aes(x = Year,y = Value,colour = Port_code))+geom_point()
#Canada_border %>%
  #filter(Port_name == 'Eastport')%>%
  #group_by(Port_code)%>%
  #ggplot(aes(x = Year,y = Value,colour = Port_code))+geom_boxplot()

State

Canada_border %>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State)

Canada_border %>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

#Measure

Canada_border %>%
  group_by(Measure) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~Measure)

Canada_border %>%
  group_by(Measure) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~Measure,scales = 'free_y')

We can see some seasonal patterns in measure

How about group_by State with specific measure ?

#Bus

Canada_border %>%
  filter(Measure == 'Buses')%>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State)

#montana Bus
Canada_border %>%
  filter(Measure == 'Buses')%>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

#Personal Vehicles

Canada_border %>%
  filter(Measure == 'Personal Vehicles')%>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State)

Canada_border %>%
  filter(Measure == 'Personal Vehicles')%>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

#Personal Vehicle Passengers

Canada_border %>%
  filter(Measure == 'Personal Vehicle Passengers')%>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State)

Canada_border %>%
  filter(Measure == 'Personal Vehicle Passengers')%>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

I think Personal Vehicles is a factor.

Among_State <- Canada_border %>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')
Bus_Among_State <- Canada_border %>%
  filter(Measure == 'Buses')%>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

Personal_Vehicles_Among_State <- Canada_border %>%
  filter(Measure == 'Personal Vehicles')%>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

Personal_Vehicles_Passengers_Among_State <- Canada_border %>%
  filter(Measure == 'Personal Vehicle Passengers')%>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

grid.arrange(Among_State, Bus_Among_State, nrow = 2)

grid.arrange(Among_State, Personal_Vehicles_Among_State, nrow = 2)

grid.arrange(Among_State, Personal_Vehicles_Passengers_Among_State, nrow = 2)

#Consider average in each month

Canada_border %>%
  group_by(Year,Month) %>%
  summarise(
    average_value = mean(Value,na.rm = T)
  ) %>%
  ggplot(aes(x = factor(Month), y=average_value,group = Year,colour = Year))+geom_line()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.

Canada_border %>%
  group_by(Year,Month) %>%
  summarise(
    average_value = mean(Value,na.rm = T)
  ) %>%
  ggplot(aes(x = factor(Month), y=average_value,colour = Year))+geom_line(aes(group = Year)) +facet_wrap(~Year)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.

#Montana

Canada_border %>%
  filter(Measure == 'Buses' & State == 'Montana')%>%
  group_by(State) %>%
  ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()

#Nov 2000, value 124 # I think it is because the election (Nov 7, 2000) #I found that the date is not sorted, I think we should work on it to analyze the plot.

#7 years to a group

Group_1 <- Canada_border %>%
  filter(Year=='1996'|Year=='1997'|Year=='1998'|Year=='1999'|Year=='2000'|Year=='2001'|Year=='2002') %>%
  group_by(Month) %>%
  summarise(
    average_value = mean(Value,na.rm = T)
  )

Group_2 <- Canada_border %>%
  filter(Year=='2003'|Year=='2004'|Year=='2005'|Year=='2006'|Year=='2007'|Year=='2008'|Year=='2009') %>%
  group_by(Month) %>%
  summarise(
    average_value = mean(Value,na.rm = T)
  )

Group_3 <- Canada_border %>%
  filter(Year=='2010'|Year=='2011'|Year=='2012'|Year=='2013'|Year=='2014'|Year=='2015'|Year=='2016') %>%
  group_by(Month) %>%
  summarise(
    average_value = mean(Value,na.rm = T)
  )

Group_4 <- Canada_border %>%
  filter(Year=='2017'|Year=='2018'|Year=='2019'|Year=='2020'|Year=='2021'|Year=='2022'|Year=='2023') %>%
  group_by(Month) %>%
  summarise(
    average_value = mean(Value,na.rm = T)
  )
ggplot() + 
  geom_line(data = Group_1,aes(x = Month, y=average_value,group = 1,colour = '1996-2002')) + 
  geom_line(data = Group_2,aes(x = Month, y=average_value,group = 1,colour = '2003-2009')) + 
  geom_line(data = Group_3,aes(x = Month, y=average_value,group = 1,colour = '2010-2016')) +
  geom_line(data = Group_4,aes(x = Month, y=average_value,group = 1,colour = '2017-2023' )) +
  scale_color_manual(name = "Year Group", values = c("1996-2002" = "steelblue", "2003-2009" = "coral2","2010-2016" = "green", "2017-2023" = "orange"))